* ---- 
* clean and aggregate key network measures to person levels 
* ---- 

use "./data/processed/CE_processed.dta",clear 

* generate individual indicator 
egen pid = group(year id)

* generate state indicators 
encode state, gen(state_code)

* create an indicator for non-relative ties
gen a_nonrelative = 1 - a_relative

* fix network size capped at 4
replace n_size4 = 4 if n_size4 > 4 

* in poinsson regression models 
* we use n_talkpol as an offest measure : political discussion network size
bysort pid: egen n_talkpol = sum(a_talkpol)
gen log_n_talkpol = ln(n_talkpol)

* craete count measure for poisson models later
bysort pid: egen n_homophily3 = sum(a_homophily_3) if a_talkpol == 1
bysort pid: egen n_homophily4 = sum(a_homophily_4) if a_talkpol == 1
bysort pid: egen n_homophily3a = sum(a_homophily_3a) if a_talkpol == 1
bysort pid: egen n_homophily4a = sum(a_homophily_4a) if a_talkpol == 1

bysort pid: egen n_middle = sum(a_middle) if a_talkpol == 1
bysort pid: egen n_nonrelative = sum(a_nonrelative) if a_talkpol == 1
bysort pid: egen n_relative = sum(a_relative) if a_talkpol == 1

bysort pid: egen p_middle = mean(a_middle) if a_talkpol == 1
bysort pid: egen p_relative = mean(a_relative) if a_talkpol == 1

* generate proportion measures
gen p_homophily3 = n_homophily3 / n_talkpol
gen p_homophily4 = n_homophily4 / n_talkpol
gen p_homophily4a = n_homophily4a / n_talkpol

* create binary indicator
gen pol_all_same = (p_homophily4 == 1) if ~missing(p_homophily4)
gen pol_one_diff = (p_homophily4 < 1) if ~missing(p_homophily4)

gen pol_one_diff3 = (p_homophily3 < 1) if ~missing(p_homophily3)
gen pol_one_diff4a = (p_homophily4a < 1) if ~missing(p_homophily4a)

gen i_relative = p_relative > 0 if ~missing(p_relative)
gen i_middle = p_middle > 0 if ~missing(p_middle)

label var i_relative "Talking to relatives"
label var i_middle "Talking to the middle"

* adjust missing year
replace n_ad_year = 0 if n_ad_year == . & year != 1992
replace n_ad = 0 if n_ad == . & year != 1992

* standardize advertisement measures; for each year
gen std_n_ad_year = .
foreach yy in 2000 2008 2016 {
	sum n_ad_year if year == `yy'
	replace std_n_ad_year = (n_ad_year - `r(mean)') / `r(sd)' if year == `yy'
}

gen std_n_ad = .
foreach yy in 2000 2008 2016 {
	sum n_ad if year == `yy'
	replace std_n_ad = (n_ad - `r(mean)') / `r(sd)' if year == `yy'
}

* create tone-related measures
gen p_attack = sum_tone_attack / (sum_tone_attack+sum_tone_promote+sum_tone_contrast)
gen p_promote = sum_tone_promote / (sum_tone_attack+sum_tone_promote+sum_tone_contrast)
gen p_contrast = sum_tone_contrast / (sum_tone_attack+sum_tone_promote+sum_tone_contrast)

gen p_attack_year = sum_tone_attack_year / (sum_tone_attack_year+sum_tone_promote_year+sum_tone_contrast_year)
gen p_promote_year = sum_tone_promote_year / (sum_tone_attack_year+sum_tone_promote_year+sum_tone_contrast_year)
gen p_contrast_year = sum_tone_contrast_year / (sum_tone_attack_year+sum_tone_promote_year+sum_tone_contrast_year)

* impute missing ad tone measures
replace p_attack = 0 if n_ad == 0 
replace p_promote = 0 if n_ad == 0 
replace p_contrast = 0 if n_ad == 0 

replace p_attack_year = 0 if n_ad_year == 0 
replace p_promote_year = 0 if n_ad_year == 0 
replace p_contrast_year = 0 if n_ad_year == 0 



* label variable names 
label var r_age "Age"
label var r_female "Female"
label var r_educ "Education"
label var r_married "Married"
label var r_working "Currently working"
label var order "Name order in network rosters"
label var r_race "Race"
	label define race 1 "White" 2 "Black" 3 "Hispanic" 4 "Other"
	label value r_race race 

label var a_homophily_4 "Political similarity"
label var a_uncertainty "Political uncertainty"
label var a_middle "Non-partisan"
label var a_relative "Kinship"
label var n_size "Network size"
label var n_size4 "Network size"
label var isolation "Political isolation"
label var year "Year"
label var state_code "State"

label var close_election "Close election"
label var r_pol_interest "Political Interest"
label var r_party_intensity "Partisan Strength"

label var n_ad_year "Total Number of Political Ad"
label var n_ad "Daily Number of Political Ad"
label var std_n_ad_year "Total Number of Political Ad"
label var std_n_ad "Daily Number of Political Ad"
label var p_attack "P(ad's tone = attack)"
label var p_promote "P(ad's tone = promote)"
label var p_contrast "P(ad's tone = contrast)"

label var p_attack_year "P(ad's tone = attack)"
label var p_promote_year "P(ad's tone = promote)"
label var p_contrast_year "P(ad's tone = contrast)"

save "./data/processed/CE_cleaned_reg.dta",replace 

